package com.demo.ocr;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import net.sourceforge.tess4j.ITesseract.RenderedFormat;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

/**
 * TODO 将图片PDF转换为双层PDF
 * @author 李德才
 * @version V2.0
 * @date 2021/7/15 14:22
 */

public class OcrForImagePdf {
  private static final String TESS_DATA = "D:\\learn\\jfinal-demo\\src\\main\\resources\\tessdata";
  private static final String IMAGE = "C:\\Users\\86159\\Desktop\\resoult\\resoultpdf.pdf";

  public static void main(String[] args) throws TesseractException {
//    识别结果写入
    overWritePdf();
  }

  public static void overWritePdf() throws TesseractException {
    File file = new File(IMAGE);
    Tesseract tesseract = new Tesseract();
    tesseract.setDatapath(TESS_DATA);
    tesseract.setLanguage("chi_sim");
    String result = tesseract.doOCR(file);
    List<RenderedFormat> list = new ArrayList<>();
    list.add(RenderedFormat.PDF);
    try {
      tesseract.createDocumentsWithResults(IMAGE, "C:\\Users\\86159\\Desktop\\resoult\\resoultImage", list,
          2);
    } catch (Exception e) {
      e.printStackTrace();
    }
    System.err.println(result);
  }
}
